Combined the factors which influenced by the epidemic and the various characteristics of each London boroughs, this study not only attempts to provide insight on the impact on house prices caused by the pandemic of the different London boroughs, but also shows the individuals’ changed demand for the house.
Firstly, Let’s library some packages which we will use in this work
library(tidyverse)
## ─ Attaching packages ──────────────────── tidyverse 1.3.0 ─
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ─ Conflicts ───────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(RColorBrewer)
library(classInt)
library(sp)
library(rgeos)
## rgeos version: 0.5-5, (SVN revision 640)
## GEOS runtime version: 3.8.1-CAPI-1.13.3
## Linking to sp version: 1.4-2
## Polygon checking: TRUE
library(tmap)
library(tmaptools)
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(rgdal)
## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.1.4, released 2020/10/20
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(geojsonio)
##
## Attaching package: 'geojsonio'
## The following object is masked from 'package:base':
##
## pretty
library(tmaptools)
library(ggplot2)
library(corrplot)
## corrplot 0.84 loaded
library(haven)
library(texreg)
## Version: 1.37.5
## Date: 2020-06-17
## Author: Philip Leifeld (University of Essex)
##
## Consider submitting praise using the praise or praise_interactive functions.
## Please cite the JSS article in your publications -- see citation("texreg").
##
## Attaching package: 'texreg'
## The following object is masked from 'package:tidyr':
##
## extract
library(dplyr)
library(tidyr)
library(broom)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(sf)
library(sp)
library(lwgeom)
## Linking to liblwgeom 3.0.0beta1 r16016, GEOS 3.8.1, PROJ 6.3.1
library(spData)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Registered S3 methods overwritten by 'spatialreg':
## method from
## residuals.stsls spdep
## deviance.stsls spdep
## coef.stsls spdep
## print.stsls spdep
## summary.stsls spdep
## print.summary.stsls spdep
## residuals.gmsar spdep
## deviance.gmsar spdep
## coef.gmsar spdep
## fitted.gmsar spdep
## print.gmsar spdep
## summary.gmsar spdep
## print.summary.gmsar spdep
## print.lagmess spdep
## summary.lagmess spdep
## print.summary.lagmess spdep
## residuals.lagmess spdep
## deviance.lagmess spdep
## coef.lagmess spdep
## fitted.lagmess spdep
## logLik.lagmess spdep
## fitted.SFResult spdep
## print.SFResult spdep
## fitted.ME_res spdep
## print.ME_res spdep
## print.lagImpact spdep
## plot.lagImpact spdep
## summary.lagImpact spdep
## HPDinterval.lagImpact spdep
## print.summary.lagImpact spdep
## print.sarlm spdep
## summary.sarlm spdep
## residuals.sarlm spdep
## deviance.sarlm spdep
## coef.sarlm spdep
## vcov.sarlm spdep
## fitted.sarlm spdep
## logLik.sarlm spdep
## anova.sarlm spdep
## predict.sarlm spdep
## print.summary.sarlm spdep
## print.sarlm.pred spdep
## as.data.frame.sarlm.pred spdep
## residuals.spautolm spdep
## deviance.spautolm spdep
## coef.spautolm spdep
## fitted.spautolm spdep
## print.spautolm spdep
## summary.spautolm spdep
## logLik.spautolm spdep
## print.summary.spautolm spdep
## print.WXImpact spdep
## summary.WXImpact spdep
## print.summary.WXImpact spdep
## predict.SLX spdep
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## anova.sarlm, as_dgRMatrix_listw, as_dsCMatrix_I, as_dsCMatrix_IrW,
## as_dsTMatrix_listw, as.spam.listw, bptest.sarlm, can.be.simmed,
## cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
## create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
## deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
## fitted.SFResult, fitted.spautolm, get.ClusterOption,
## get.coresOption, get.mcOption, get.VerboseOption,
## get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
## gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
## LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
## predict.SLX, print.gmsar, print.ME_res, print.sarlm,
## print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
## print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
## print.summary.stsls, residuals.gmsar, residuals.sarlm,
## residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
## SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
## SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
## stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
## summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
library(Matrix)
library(spgwr)
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
Now Let’s import the data and have quick look of the data
hpa <- read_csv("house_price_analyse.csv")
##
## ─ Column specification ────────────────────────────
## cols(
## area_name = col_character(),
## area_code = col_character(),
## year_by_year_growth = col_double(),
## claimant_count_rate = col_double(),
## cases_per_1000000 = col_number(),
## prop_of_residence = col_double(),
## aver_num_of_open_space = col_double(),
## still_eco_secure = col_double(),
## sales_Oct20 = col_double()
## )
head(hpa)
## # A tibble: 6 x 9
## area_name area_code year_by_year_gr… claimant_count_… cases_per_10000…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Barking … E09000002 1.31 10.1 7151.
## 2 Barnet E09000003 2.01 7.3 4907.
## 3 Bexley E09000004 3.4 5.7 6285.
## 4 Brent E09000005 4.82 10.3 4964.
## 5 Bromley E09000006 2.69 5.5 5075.
## 6 Camden E09000007 6.52 5.8 3427.
## # … with 4 more variables: prop_of_residence <dbl>,
## # aver_num_of_open_space <dbl>, still_eco_secure <dbl>, sales_Oct20 <dbl>
hpa_num <- dplyr :: select_if(hpa, is.numeric)
Load London borough shape file
Londonborough <- st_read("ESRI/London_Borough_Excluding_MHW.shp")
## Reading layer `London_Borough_Excluding_MHW' from data source `/Users/liyiqing/Desktop/GIS final/gis/ESRI/London_Borough_Excluding_MHW.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 7 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## projected CRS: OSGB 1936 / British National Grid
st_transform(Londonborough, 27700)
## Simple feature collection with 33 features and 7 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## projected CRS: OSGB 1936 / British National Grid
## First 10 features:
## NAME GSS_CODE HECTARES NONLD_AREA ONS_INNER SUB_2009
## 1 Kingston upon Thames E09000021 3726.117 0.000 F <NA>
## 2 Croydon E09000008 8649.441 0.000 F <NA>
## 3 Bromley E09000006 15013.487 0.000 F <NA>
## 4 Hounslow E09000018 5658.541 60.755 F <NA>
## 5 Ealing E09000009 5554.428 0.000 F <NA>
## 6 Havering E09000016 11445.735 210.763 F <NA>
## 7 Hillingdon E09000017 11570.063 0.000 F <NA>
## 8 Harrow E09000015 5046.330 0.000 F <NA>
## 9 Brent E09000005 4323.270 0.000 F <NA>
## 10 Barnet E09000003 8674.837 0.000 F <NA>
## SUB_2006 geometry
## 1 <NA> MULTIPOLYGON (((516401.6 16...
## 2 <NA> MULTIPOLYGON (((535009.2 15...
## 3 <NA> MULTIPOLYGON (((540373.6 15...
## 4 <NA> MULTIPOLYGON (((521975.8 17...
## 5 <NA> MULTIPOLYGON (((510253.5 18...
## 6 <NA> MULTIPOLYGON (((549893.9 18...
## 7 <NA> MULTIPOLYGON (((510599.8 19...
## 8 <NA> MULTIPOLYGON (((510599.8 19...
## 9 <NA> MULTIPOLYGON (((525201 1825...
## 10 <NA> MULTIPOLYGON (((524579.9 19...
Londonborough <- Londonborough[order(Londonborough$GSS_CODE), ]
head(Londonborough)
## Simple feature collection with 6 features and 7 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 515484.9 ymin: 156480.9 xmax: 554089.2 ymax: 198355.2
## projected CRS: OSGB 1936 / British National Grid
## NAME GSS_CODE HECTARES NONLD_AREA ONS_INNER SUB_2009
## 33 City of London E09000001 314.942 24.546 T <NA>
## 32 Barking and Dagenham E09000002 3779.934 169.150 F <NA>
## 10 Barnet E09000003 8674.837 0.000 F <NA>
## 15 Bexley E09000004 6428.649 370.619 F <NA>
## 9 Brent E09000005 4323.270 0.000 F <NA>
## 3 Bromley E09000006 15013.487 0.000 F <NA>
## SUB_2006 geometry
## 33 <NA> MULTIPOLYGON (((531145.1 18...
## 32 <NA> MULTIPOLYGON (((543905.4 18...
## 10 <NA> MULTIPOLYGON (((524579.9 19...
## 15 <NA> MULTIPOLYGON (((547226.2 18...
## 9 <NA> MULTIPOLYGON (((525201 1825...
## 3 <NA> MULTIPOLYGON (((540373.6 15...
#summary the data
summary(hpa)
## area_name area_code year_by_year_growth claimant_count_rate
## Length:32 Length:32 Min. :-0.420 Min. : 4.800
## Class :character Class :character 1st Qu.: 1.340 1st Qu.: 5.800
## Mode :character Mode :character Median : 3.990 Median : 7.950
## Mean : 4.027 Mean : 7.756
## 3rd Qu.: 5.897 3rd Qu.: 9.200
## Max. :10.340 Max. :10.700
## cases_per_1000000 prop_of_residence aver_num_of_open_space still_eco_secure
## Min. :3427 Min. :50.60 Min. : 4.170 Min. :26.00
## 1st Qu.:4470 1st Qu.:62.55 1st Qu.: 5.032 1st Qu.:34.00
## Median :4963 Median :68.35 Median : 5.750 Median :44.00
## Mean :5130 Mean :66.88 Mean : 6.478 Mean :41.75
## 3rd Qu.:5448 3rd Qu.:72.20 3rd Qu.: 6.955 3rd Qu.:47.25
## Max. :7658 Max. :75.10 Max. :12.710 Max. :56.00
## sales_Oct20
## Min. : 77.0
## 1st Qu.:119.8
## Median :140.0
## Mean :159.0
## 3rd Qu.:188.8
## Max. :315.0
plot the histgram of the house price year by year growth rate distribution
library(ggplot2)
# set up the basic histogram
gghist1 <- ggplot(hpa,
aes(x = year_by_year_growth)) +
geom_histogram(color="black",
fill="white")+
labs(title="House Price Year by Year Growth Rate Distribution",
x="house price year by year growth",
y="Frequency")
gghist1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(ggplot2)
# set up the basic histogram
gghist1 <- ggplot(hpa,
aes(x = sqrt(year_by_year_growth))) +
geom_histogram(color="black",
fill="white")+
labs(title="House Price Year by Year Growth Rate Distribution",
x="house price year by year growth",
y="Frequency")
gghist1
## Warning in sqrt(year_by_year_growth): 产生了NaNs
## Warning in sqrt(year_by_year_growth): 产生了NaNs
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).
library(ggplot2)
# set up the basic histogram
gghist1 <- ggplot(hpa,
aes(x = log(year_by_year_growth))) +
geom_histogram(color="black",
fill="white")+
labs(title="House Price Year by Year Growth Rate Distribution",
x="house price year by year growth",
y="Frequency")
gghist1
## Warning in log(year_by_year_growth): 产生了NaNs
## Warning in log(year_by_year_growth): 产生了NaNs
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).
map the house price year by year growth rate
#merge boundaries and data
hpa <- Londonborough%>%
left_join(.,
hpa,
by = c("GSS_CODE" = "area_code"))
hpa <- hpa[-1, ]
#let's map our dependent variable to see if the join has worked:
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(hpa,
fill = "year_by_year_growth",
borders = "black",
fill.palette = "Blues")
map the independent variables
plot(hpa[10:15])
plot the correlation between the features
#create a dadaframe of the numeric datas
corrplot(cor(hpa_num))
ggpairs
ggpairs(hpa_num,axisLabels="none")
Build the models contains all the independent variables
lm.max <- lm(year_by_year_growth ~ ., data = hpa_num)
summary(lm.max)
##
## Call:
## lm(formula = year_by_year_growth ~ ., data = hpa_num)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.2551 -1.4765 -0.0392 1.6454 5.8746
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.1150229 8.7760178 0.583 0.565
## claimant_count_rate -0.0753606 0.6814198 -0.111 0.913
## cases_per_1000000 -0.0005628 0.0006417 -0.877 0.389
## prop_of_residence 0.0174010 0.1322549 0.132 0.896
## aver_num_of_open_space -0.0967192 0.3195321 -0.303 0.765
## still_eco_secure 0.0561146 0.1433867 0.391 0.699
## sales_Oct20 -0.0031184 0.0119674 -0.261 0.797
##
## Residual standard error: 2.982 on 25 degrees of freedom
## Multiple R-squared: 0.11, Adjusted R-squared: -0.1036
## F-statistic: 0.5149 on 6 and 25 DF, p-value: 0.7914
Use Leaps to select the variables
library(leaps)
regsubsets.out <- regsubsets( year_by_year_growth ~ .,
data = hpa_num,
nbest = 1,
nvmax = NULL,
force.in = NULL, force.out = NULL,
method = 'exhaustive')
summary(regsubsets.out)
## Subset selection object
## Call: regsubsets.formula(year_by_year_growth ~ ., data = hpa_num, nbest = 1,
## nvmax = NULL, force.in = NULL, force.out = NULL, method = "exhaustive")
## 6 Variables (and intercept)
## Forced in Forced out
## claimant_count_rate FALSE FALSE
## cases_per_1000000 FALSE FALSE
## prop_of_residence FALSE FALSE
## aver_num_of_open_space FALSE FALSE
## still_eco_secure FALSE FALSE
## sales_Oct20 FALSE FALSE
## 1 subsets of each size up to 6
## Selection Algorithm: exhaustive
## claimant_count_rate cases_per_1000000 prop_of_residence
## 1 ( 1 ) " " "*" " "
## 2 ( 1 ) " " "*" " "
## 3 ( 1 ) " " "*" " "
## 4 ( 1 ) " " "*" " "
## 5 ( 1 ) " " "*" "*"
## 6 ( 1 ) "*" "*" "*"
## aver_num_of_open_space still_eco_secure sales_Oct20
## 1 ( 1 ) " " " " " "
## 2 ( 1 ) " " "*" " "
## 3 ( 1 ) "*" "*" " "
## 4 ( 1 ) "*" "*" "*"
## 5 ( 1 ) "*" "*" "*"
## 6 ( 1 ) "*" "*" "*"
as.data.frame(summary(regsubsets.out)$outmat)
## claimant_count_rate cases_per_1000000 prop_of_residence
## 1 ( 1 ) *
## 2 ( 1 ) *
## 3 ( 1 ) *
## 4 ( 1 ) *
## 5 ( 1 ) * *
## 6 ( 1 ) * * *
## aver_num_of_open_space still_eco_secure sales_Oct20
## 1 ( 1 )
## 2 ( 1 ) *
## 3 ( 1 ) * *
## 4 ( 1 ) * * *
## 5 ( 1 ) * * *
## 6 ( 1 ) * * *
plot(regsubsets.out, scale='adjr2', main='Adjusted Rsq')
According to the results show above, “cases_per_1000000”, “aver_num_of_open_space”, and “still_eco_secure” are the top three variables which are more significant to the dependent variable. So let’s build a model employed these tree variables and check the performance of the model.
lm_sel <- lm(year_by_year_growth ~ cases_per_1000000 + aver_num_of_open_space + still_eco_secure, data = hpa_num)
summary(lm_sel)
##
## Call:
## lm(formula = year_by_year_growth ~ cases_per_1000000 + aver_num_of_open_space +
## still_eco_secure, data = hpa_num)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.037 -1.645 -0.154 1.476 6.018
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.5231079 5.4814034 1.008 0.322
## cases_per_1000000 -0.0006206 0.0005813 -1.068 0.295
## aver_num_of_open_space -0.0776471 0.2387486 -0.325 0.747
## still_eco_secure 0.0524703 0.0725177 0.724 0.475
##
## Residual standard error: 2.824 on 28 degrees of freedom
## Multiple R-squared: 0.1061, Adjusted R-squared: 0.01037
## F-statistic: 1.108 on 3 and 28 DF, p-value: 0.3623
hpa_num <- hpa_num[,c(-2, -4, -7)]
hpa <- hpa[,c(-10, -12, -15)]
As we can see that the new model dose not perform so well. So our final selection is the lm_new.max.
1.the residuals are normally distributed
#save the residuals into the datafram
lm_modeldata_new <- lm_sel %>%
augment(., hpa_num)
#plot residuals
lm_modeldata_new%>%
dplyr::select(.resid)%>%
pull()%>%
qplot()+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
2.There is no multicolinearity among the independent variables
vif(lm_sel)
## cases_per_1000000 aver_num_of_open_space still_eco_secure
## 1.491353 1.162065 1.459810
3.Homoscedasticity plot the residuals against the predicted values.
par(mfrow=c(2,2)) #plot to 2 by 2 array
plot(lm_sel)
DW <- durbinWatsonTest(lm_sel)
tidy(DW)
## # A tibble: 1 x 5
## statistic p.value autocorrelation method alternative
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 1.57 0.206 0.158 Durbin-Watson Test two.sided
create the new dataframe
#create a new data frame with the transformed data
hpa$lm_resids = lm_sel$residuals
map the residuals
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(hpa,
fill = "lm_resids",
borders = "black",
fill.palette = "-RdBu")
## Variable(s) "lm_resids" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#calculate the centroids of boroughs in London
coordsB <- hpa%>%
st_centroid()%>%
st_geometry()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
plot(coordsB)
#queen's case
LB_nb <- hpa %>%
poly2nb(., queen=T)
#nearest neighbours
knn_B1 <-coordsB %>%
knearneigh(., k=3)
knn_B2 <-coordsB %>%
knearneigh(., k=5)
LB_knn3 <- knn_B1 %>%
knn2nb()
LB_knn5 <- knn_B2 %>%
knn2nb()
#plot them
plot(LB_nb, st_geometry(coordsB), col="red")
plot(LB_knn3, st_geometry(coordsB), col="blue")
plot(LB_knn5, st_geometry(coordsB), col="blue")
#create a spatial weights matrix object from these weights
LB_queens_weight <- LB_nb %>%
nb2listw(., style="C")
LB.knn_3_weight <- LB_knn3 %>%
nb2listw(., style="C")
LB.knn_5_weight <- LB_knn5 %>%
nb2listw(., style="C")
Queen <- hpa %>%
st_drop_geometry()%>%
dplyr::select(lm_resids)%>%
pull()%>%
moran.test(., LB_queens_weight)%>%
tidy()
Queen
## # A tibble: 1 x 7
## estimate1 estimate2 estimate3 statistic p.value method alternative
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -0.139 -0.0323 0.0135 -0.919 0.821 Moran I test unde… greater
Nearest_neighbour1 <- hpa %>%
st_drop_geometry()%>%
dplyr::select(lm_resids)%>%
pull()%>%
moran.test(., LB.knn_3_weight)%>%
tidy()
Nearest_neighbour1
## # A tibble: 1 x 7
## estimate1 estimate2 estimate3 statistic p.value method alternative
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -0.0863 -0.0323 0.0163 -0.423 0.664 Moran I test unde… greater
Nearest_neighbour2 <- hpa %>%
st_drop_geometry()%>%
dplyr::select(lm_resids)%>%
pull()%>%
moran.test(., LB.knn_5_weight)%>%
tidy()
Nearest_neighbour2
## # A tibble: 1 x 7
## estimate1 estimate2 estimate3 statistic p.value method alternative
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -0.0579 -0.0323 0.00885 -0.272 0.607 Moran I test unde… greater
slm_queen <- lagsarlm(year_by_year_growth ~ cases_per_1000000 +
aver_num_of_open_space + still_eco_secure,
data = hpa,
nb2listw(LB_nb, style="C"),
method = "eigen")
#what do the outputs show?
summary(slm_queen)
##
## Call:lagsarlm(formula = year_by_year_growth ~ cases_per_1000000 +
## aver_num_of_open_space + still_eco_secure, data = hpa, listw = nb2listw(LB_nb,
## style = "C"), method = "eigen")
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.96269 -1.64203 -0.14911 1.53668 6.15674
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.17264229 5.77035958 1.0697 0.2847
## cases_per_1000000 -0.00067612 0.00058258 -1.1606 0.2458
## aver_num_of_open_space -0.08974996 0.22447101 -0.3998 0.6893
## still_eco_secure 0.05019442 0.06912625 0.7261 0.4678
##
## Rho: -0.046, LR test value: 0.04416, p-value: 0.83356
## Asymptotic standard error: 0.21148
## z-value: -0.21751, p-value: 0.82781
## Wald statistic: 0.047311, p-value: 0.82781
##
## Log likelihood: -76.46988 for lag model
## ML residual variance (sigma squared): 6.9654, (sigma: 2.6392)
## Number of observations: 32
## Number of parameters estimated: 6
## AIC: 164.94, (AIC for lm: 162.98)
## LM test for residual autocorrelation
## test value: 3.0361, p-value: 0.081431
hpa <- hpa %>%
mutate(slm_queen_resids = residuals(slm_queen))
QueenMoran <- hpa %>%
st_drop_geometry()%>%
dplyr::select(slm_queen_resids)%>%
pull()%>%
moran.test(., LB_queens_weight)%>%
tidy()
QueenMoran
## # A tibble: 1 x 7
## estimate1 estimate2 estimate3 statistic p.value method alternative
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 -0.120 -0.0323 0.0135 -0.752 0.774 Moran I test unde… greater
sem_queen <- errorsarlm(year_by_year_growth ~ cases_per_1000000 +
aver_num_of_open_space + still_eco_secure,
data = hpa,
nb2listw(LB_nb, style="C"),
method = "eigen")
summary(sem_queen)
##
## Call:errorsarlm(formula = year_by_year_growth ~ cases_per_1000000 +
## aver_num_of_open_space + still_eco_secure, data = hpa, listw = nb2listw(LB_nb,
## style = "C"), method = "eigen")
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.62616 -1.61717 0.12334 1.49409 6.16708
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.41138255 4.18272878 1.5328 0.12532
## cases_per_1000000 -0.00073080 0.00043727 -1.6713 0.09467
## aver_num_of_open_space -0.16321491 0.19572110 -0.8339 0.40433
## still_eco_secure 0.05941741 0.05443118 1.0916 0.27501
##
## Lambda: -0.34605, LR test value: 1.5778, p-value: 0.20908
## Asymptotic standard error: 0.26434
## z-value: -1.3091, p-value: 0.1905
## Wald statistic: 1.7138, p-value: 0.1905
##
## Log likelihood: -75.70308 for error model
## ML residual variance (sigma squared): 6.4553, (sigma: 2.5407)
## Number of observations: 32
## Number of parameters estimated: 6
## AIC: 163.41, (AIC for lm: 162.98)
queen.listw <- LB_queens_weight
listw1 <- queen.listw
localmi <- localmoran(hpa$year_by_year_growth, listw=listw1, alternative = "greater")
localmi
## Ii E.Ii Var.Ii Z.Ii Pr(z > 0)
## 2 0.474777015 -0.02457757 0.1729812 1.20063103 0.114947179
## 3 0.047633769 -0.04096262 0.2687297 0.17090635 0.432148703
## 4 0.083736320 -0.01638505 0.1192353 0.28995085 0.385926919
## 5 -0.019092158 -0.05734767 0.3488203 0.06477287 0.474177422
## 6 0.233281163 -0.04915515 0.3107323 0.50667253 0.306192303
## 7 -0.109140001 -0.04096262 0.2687297 -0.13151717 0.552316898
## 8 0.286296970 -0.03277010 0.2228127 0.67594601 0.249537461
## 9 -1.050233956 -0.04096262 0.2687297 -1.94692893 0.974228372
## 10 0.008964112 -0.02457757 0.1729812 0.08064648 0.467861552
## 11 0.127959929 -0.02457757 0.1729812 0.36675593 0.356900540
## 12 -0.293041759 -0.04096262 0.2687297 -0.48627178 0.686612757
## 13 -1.472841412 -0.03277010 0.2228127 -3.05080206 0.998858845
## 14 0.004270353 -0.04915515 0.3107323 0.09584190 0.461823069
## 15 -0.301245603 -0.03277010 0.2228127 -0.56876741 0.715243001
## 16 0.236960576 -0.01638505 0.1192353 0.73368733 0.231569661
## 17 -0.003915200 -0.02457757 0.1729812 0.04967990 0.480188737
## 18 -0.054424607 -0.03277010 0.2228127 -0.04587524 0.518295157
## 19 -0.091740296 -0.02457757 0.1729812 -0.16148375 0.564143794
## 20 -1.277765927 -0.02457757 0.1729812 -3.01312307 0.998707130
## 21 1.096541778 -0.03277010 0.2228127 2.39245582 0.008368021
## 22 -0.008825127 -0.04096262 0.2687297 0.06199465 0.475283548
## 23 -0.146758059 -0.02457757 0.1729812 -0.29376657 0.615531858
## 24 1.393099625 -0.04096262 0.2687297 2.76636935 0.002834215
## 25 0.388049675 -0.04096262 0.2687297 0.82758365 0.203953166
## 26 -0.044127480 -0.03277010 0.2228127 -0.02406070 0.509597905
## 27 0.523661556 -0.02457757 0.1729812 1.31816734 0.093723817
## 28 0.116540759 -0.02457757 0.1729812 0.33930007 0.367191844
## 29 0.036214389 -0.02457757 0.1729812 0.14616611 0.441895131
## 30 0.003468807 -0.01638505 0.1192353 0.05749664 0.477074791
## 31 -0.373094377 -0.04096262 0.2687297 -0.64069681 0.739140157
## 32 0.909695544 -0.03277010 0.2228127 1.99662065 0.022933204
## 33 -1.051223044 -0.02457757 0.1729812 -2.46843113 0.993214662
## attr(,"call")
## localmoran(x = hpa$year_by_year_growth, listw = listw1, alternative = "greater")
## attr(,"class")
## [1] "localmoran" "matrix" "array"
hpa$hpmi <- localmi[,1]
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(hpa,
fill = "hpmi",
borders = "black",
fill.palette = "Blues")
#7.5 GWR
st_crs(hpa) = 27700
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
hpa <- hpa %>%
as(., "Spatial")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum OSGB 1936 in CRS definition
st_crs(coordsB) = 2770
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
coordsBSP <- coordsB %>%
as(., "Spatial")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in CRS definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum NAD83 (High Accuracy Reference Network) in CRS
## definition
GWRbandwidth <- gwr.sel(year_by_year_growth ~ cases_per_1000000 +
aver_num_of_open_space + still_eco_secure,
data = hpa,
coords=coordsBSP,
adapt=T)
## Warning in gwr.sel(year_by_year_growth ~ cases_per_1000000 +
## aver_num_of_open_space + : data is Spatial* object, ignoring coords argument
## Adaptive q: 0.381966 CV score: 299.4925
## Adaptive q: 0.618034 CV score: 291.8842
## Adaptive q: 0.763932 CV score: 289.2583
## Adaptive q: 0.9325207 CV score: 287.3688
## Adaptive q: 0.8681255 CV score: 287.9386
## Adaptive q: 0.9582955 CV score: 287.298
## Adaptive q: 0.9657041 CV score: 287.2862
## Adaptive q: 0.978804 CV score: 287.1715
## Adaptive q: 0.9869001 CV score: 287.1073
## Adaptive q: 0.9919038 CV score: 287.0754
## Adaptive q: 0.9949963 CV score: 287.058
## Adaptive q: 0.9969075 CV score: 287.0481
## Adaptive q: 0.9980888 CV score: 287.0422
## Adaptive q: 0.9988188 CV score: 287.0387
## Adaptive q: 0.99927 CV score: 287.0365
## Adaptive q: 0.9995488 CV score: 287.0352
## Adaptive q: 0.9997212 CV score: 287.0344
## Adaptive q: 0.9998277 CV score: 287.0339
## Adaptive q: 0.9998935 CV score: 287.0336
## Adaptive q: 0.9999342 CV score: 287.0334
## Adaptive q: 0.9999342 CV score: 287.0334
## Warning in gwr.sel(year_by_year_growth ~ cases_per_1000000 +
## aver_num_of_open_space + : Bandwidth converged to upper bound:1
build GWR model
gwr.model = gwr(year_by_year_growth ~ cases_per_1000000 +
aver_num_of_open_space + still_eco_secure,
data = hpa,
coords=coordsBSP,
adapt=GWRbandwidth,
hatmatrix=TRUE,
se.fit=TRUE)
## Warning in gwr(year_by_year_growth ~ cases_per_1000000 + aver_num_of_open_space
## + : data is Spatial* object, ignoring coords argument
## Warning in proj4string(data): CRS object has comment, which is lost in output
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition
gwr.model
## Call:
## gwr(formula = year_by_year_growth ~ cases_per_1000000 + aver_num_of_open_space +
## still_eco_secure, data = hpa, coords = coordsBSP, adapt = GWRbandwidth,
## hatmatrix = TRUE, se.fit = TRUE)
## Kernel function: gwr.Gauss
## Adaptive quantile: 0.9999342 (about 31 of 32 data points)
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median 3rd Qu.
## X.Intercept. 4.95160842 5.04396480 5.16991195 5.34675252
## cases_per_1000000 -0.00061984 -0.00059530 -0.00057082 -0.00055833
## aver_num_of_open_space -0.10744676 -0.09296139 -0.08211238 -0.07348976
## still_eco_secure 0.60822096 0.63670620 0.66842729 0.68615488
## Max. Global
## X.Intercept. 5.62925403 5.5231
## cases_per_1000000 -0.00053395 -0.0006
## aver_num_of_open_space -0.05697641 -0.0776
## still_eco_secure 0.69308843 0.0525
## Number of data points: 32
## Effective number of parameters (residual: 2traceS - traceS'S): 4.694045
## Effective degrees of freedom (residual: 2traceS - traceS'S): 27.30595
## Sigma (residual: 2traceS - traceS'S): 2.834778
## Effective number of parameters (model: traceS): 4.363394
## Effective degrees of freedom (model: traceS): 27.63661
## Sigma (model: traceS): 2.817769
## Sigma (ML): 2.618621
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 165.8109
## AIC (GWR p. 96, eq. 4.22): 156.7849
## Residual sum of squares: 219.4297
## Quasi-global R2: 0.121715
analyse the results of GWR
results <- as.data.frame(gwr.model$SDF)
names(results)
## [1] "sum.w" "X.Intercept."
## [3] "cases_per_1000000" "aver_num_of_open_space"
## [5] "still_eco_secure" "X.Intercept._se"
## [7] "cases_per_1000000_se" "aver_num_of_open_space_se"
## [9] "still_eco_secure_se" "gwr.e"
## [11] "pred" "pred.se"
## [13] "localR2" "still_eco_secure_EDF"
## [15] "X.Intercept._se_EDF" "cases_per_1000000_se_EDF"
## [17] "aver_num_of_open_space_se_EDF" "still_eco_secure_se_EDF"
## [19] "pred.se.1"
hpa_gwr <- hpa
hpa_gwr$coefcasesrate <- results$cases_per_1000000
hpa_gwr$coefaveros <- results$aver_num_of_open_space
hpa_gwr$coefeco <- results$still_eco_secure
hpa_gwr$localr <- results$localR2
run significance test
#run the significance test
sigTest_cases = abs(gwr.model$SDF$"cases_per_1000000")-2 * gwr.model$SDF$"cases_per_1000000_se"
#store significance results
hpa_gwr$casesig = sigTest_cases
#run the significance test
sigTest_aver = abs(gwr.model$SDF$"aver_num_of_open_space")-2 * gwr.model$SDF$"aver_num_of_open_space_se"
#store significance results
hpa_gwr$aversig = sigTest_aver
#run the significance test
sigTest_eco = abs(gwr.model$SDF$"still_eco_secure")-2 * gwr.model$SDF$"still_eco_secure_se"
#store significance results
hpa_gwr$ecosig = sigTest_eco
map the sdandard errors
tm_shape(hpa_gwr) +
tm_polygons(col = "casesig",
palette = "Blues",
alpha = 0.6)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
tm_shape(hpa_gwr) +
tm_polygons(col = "aversig",
palette = "Blues",
alpha = 0.6)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
tm_shape(hpa_gwr) +
tm_polygons(col = "ecosig",
palette = "Reds",
alpha = 0.6)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output